# download_jitm_issue_live.py
# JITM (Journal of Information Technology Management) Downloader
# Automates downloading PDFs from JITM HTML issue pages
# - Parses issue page for article titles and direct PDF links
# - Handles both regular issues and multiple "Special Issues" (shortened to "SI" in folder names)
# - Creates dynamic folders like JITM_Vol14_Issue1_2022 or JITM_Vol14_SI_Security_and_Resource_Management_2022
# - Saves each PDF with sanitized filenames for cross-platform compatibility
# - Displays numbered progress messages for each saved file
# - Logs all downloads into a CSV file


import os
import re
import csv
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def get_soup(url):
    print("[INFO] Fetching issue page…")
    r = requests.get(url)
    r.raise_for_status()
    return BeautifulSoup(r.text, "html.parser")

def sanitize_filename(name):
    return re.sub(r'[\\/*?:"<>|]', "_", name)

def main():
    issue_url = input("Paste JITM issue URL: ").strip()
    soup = get_soup(issue_url)

    # Extract volume/issue/year text
    vol_info_div = soup.find("div", class_="weight-200 nomargin-top")
    if vol_info_div:
        vol_text = vol_info_div.get_text(strip=True)
        m = re.search(r"Volume\s+(\d+),\s*(.*?)\s+(\d{4})", vol_text)
        if m:
            vol_num = m.group(1)
            issue_part = m.group(2).strip().replace(":", "_").replace(",", "")
            year = m.group(3)
            # Replace Special Issue with SI
            issue_part = issue_part.replace("Special Issue", "SI")
            folder_name = f"JITM_Vol{vol_num}_{issue_part}_{year}".replace(" ", "_")
        else:
            folder_name = "JITM_UnknownIssue"
    else:
        folder_name = "JITM_UnknownIssue"

    os.makedirs(folder_name, exist_ok=True)

    # Find articles and PDFs
    articles = []
    for h5 in soup.find_all("h5", class_="list-article-title"):
        a_tag = h5.find("a", href=True)
        if not a_tag:
            continue
        title = a_tag.get_text(strip=True)
        li_pdf = h5.find_next("ul").find("a", class_="pdf_link", href=True)
        if li_pdf:
            pdf_url = urljoin(issue_url, li_pdf["href"])
            articles.append((title, pdf_url))

    print(f"[INFO] Found {len(articles)} PDFs on this issue page")

    log_path = os.path.join(folder_name, f"{folder_name}_log.csv")
    with open(log_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["#", "Title", "PDF_URL", "Status"])
        for idx, (title, pdf_url) in enumerate(articles, start=1):
            filename = sanitize_filename(title) + ".pdf"
            filepath = os.path.join(folder_name, filename)
            try:
                r = requests.get(pdf_url)
                r.raise_for_status()
                with open(filepath, "wb") as pdf_file:
                    pdf_file.write(r.content)
                print(f"[{idx}] ✅ Saved: {filename}")
                writer.writerow([idx, title, pdf_url, "OK"])
            except Exception as e:
                print(f"[{idx}] ❌ ERROR: {title} -> {e}")
                writer.writerow([idx, title, pdf_url, f"ERROR: {e}"])

    print(f"Done! {len(articles)} PDFs saved in {folder_name}")
    print(f"Log: {log_path}")

if __name__ == "__main__":
    main()
